# -*- coding: utf-8 -*-
# time: 2025/4/17 10:53
# file: CLIP_model_hf.py
# author: hanson
"""
案例1：调用CLIP模型（HuggingFace）
"""

# 安装依赖: pip install torch transformers pillow
from PIL import Image
import torch
from transformers import CLIPProcessor, CLIPModel

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

image = Image.open("./image/cat.1.jpg")  # 替换为你的图片路径
texts = ["a cat", "a dog", "a tree"]
inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)

outputs = model(**inputs)
probs = outputs.logits_per_image.softmax(dim=1)
print("预测结果:", {text: f"{prob:.2%}" for text, prob in zip(texts, probs[0])})